#source('http://bioconductor.org/biocLite.R')
#biocLite('phyloseq')
library(phyloseq)
library(ggplot2)
library(plyr)
library(dplyr)
library(Rmisc)
library(DESeq2)
library(doParallel)
library(vegan)
library(grid)
library(gridExtra)
library(reshape2)
# Load biom file.
biom <- import_biom("OTU_table.biom", "~/Dropbox/clado-manuscript/Nephele/PipelineResults_NMEPINZ20QK1/nephele_outputs/tree.tre", parseFunction=parse_taxonomy_greengenes)
biom
# Load and merge sample metadata with read data (biom).
sam.data <- read.csv(file="sample.data.csv", row.names=1, header=TRUE)
sam.data$Date <- as.factor(sam.data$Date)
sam.data$DateSite <- paste(sam.data$Date, sam.data$Site)
sample_data(biom) <- sam.data
sample_data(biom)
## Sample Data: [52 samples by 6 sample variables]:
## TreatmentGroup Site Date Description
## C178N1 Early North 178 Sample of day 178 at site North 1
## C178P1 Early Point 178 Sample of day 178 at site Point 1
## C185P2 Early Point 185 Sample of day 185 at site Point 2
## C206N2 Late North 206 Sample of day 206 at site North 2
## C206P1 Late Point 206 Sample of day 206 at site Point 1
## C206P2 Late Point 206 Sample of day 206 at site Point 2
## C214P1 Late Point 214 Sample of day 214 at site Point 1
## C214P2 Late Point 214 Sample of day 214 at site Point 2
## C214P3 Late Point 214 Sample of day 214 at site Point 3
## C214S1 Late South 214 Sample of day 214 at site South 1
## C214S2 Late South 214 Sample of day 214 at site South 2
## C214S3 Late South 214 Sample of day 214 at site South 3
## C185P1 Early Point 185 Sample of day 185 at site Point 1
## C185P3 Early Point 185 Sample of day 185 at site Point 3
## C199P3 Late Point 199 Sample of day 199 at site Point 3
## C199S2 Late South 199 Sample of day 199 at site South 2
## C199S3 Late South 199 Sample of day 199 at site South 3
## C206N1 Late North 206 Sample of day 206 at site North 1
## C178P2 Early Point 178 Sample of day 178 at site Point 2
## C199N3 Late North 199 Sample of day 199 at site North 3
## C206S1 Late South 206 Sample of day 206 at site South 1
## C214N3 Late North 214 Sample of day 214 at site North 3
## C199N2 Late North 199 Sample of day 199 at site North 2
## C206N3 Late North 206 Sample of day 206 at site North 3
## C206S2 Late South 206 Sample of day 206 at site South 2
## C199N1 Late North 199 Sample of day 199 at site North 1
## C199P1 Late Point 199 Sample of day 199 at site Point 1
## C199S1 Late South 199 Sample of day 199 at site South 1
## C214N1 Late North 214 Sample of day 214 at site North 1
## C172P1 Early Point 172 Sample of day 172 at site Point 1
## C199P2 Late Point 199 Sample of day 199 at site Point 2
## C172N3 Early North 172 Sample of day 172 at site North 3
## C172S3 Early South 172 Sample of day 172 at site South 3
## C178S2 Early South 178 Sample of day 178 at site South 2
## C178P3 Early Point 178 Sample of day 178 at site Point 3
## C178S3 Early South 178 Sample of day 178 at site South 3
## C172N1 Early North 172 Sample of day 172 at site North 1
## C172S1 Early South 172 Sample of day 172 at site South 1
## C178N3 Early North 178 Sample of day 178 at site North 3
## C185N2 Early North 185 Sample of day 185 at site North 2
## C185N3 Early North 185 Sample of day 185 at site North 3
## C185S3 Early South 185 Sample of day 185 at site South 3
## C214N2 Late North 214 Sample of day 214 at site North 2
## C172P2 Early Point 172 Sample of day 172 at site Point 2
## C185S2 Early South 185 Sample of day 185 at site South 2
## C172P3 Early Point 172 Sample of day 172 at site Point 3
## C185N1 Early North 185 Sample of day 185 at site North 1
## C172N2 Early North 172 Sample of day 172 at site North 2
## C178S1 Early South 178 Sample of day 178 at site South 1
## C185S1 Early South 185 Sample of day 185 at site South 1
## C172S2 Early South 172 Sample of day 172 at site South 2
## C178N2 Early North 178 Sample of day 178 at site North 2
## SampleID.1 DateSite
## C178N1 C178N1 178 North
## C178P1 C178P1 178 Point
## C185P2 C185P2 185 Point
## C206N2 C206N2 206 North
## C206P1 C206P1 206 Point
## C206P2 C206P2 206 Point
## C214P1 C214P1 214 Point
## C214P2 C214P2 214 Point
## C214P3 C214P3 214 Point
## C214S1 C214S1 214 South
## C214S2 C214S2 214 South
## C214S3 C214S3 214 South
## C185P1 C185P1 185 Point
## C185P3 C185P3 185 Point
## C199P3 C199P3 199 Point
## C199S2 C199S2 199 South
## C199S3 C199S3 199 South
## C206N1 C206N1 206 North
## C178P2 C178P2 178 Point
## C199N3 C199N3 199 North
## C206S1 C206S1 206 South
## C214N3 C214N3 214 North
## C199N2 C199N2 199 North
## C206N3 C206N3 206 North
## C206S2 C206S2 206 South
## C199N1 C199N1 199 North
## C199P1 C199P1 199 Point
## C199S1 C199S1 199 South
## C214N1 C214N1 214 North
## C172P1 C172P1 172 Point
## C199P2 C199P2 199 Point
## C172N3 C172N3 172 North
## C172S3 C172S3 172 South
## C178S2 C178S2 178 South
## C178P3 C178P3 178 Point
## C178S3 C178S3 178 South
## C172N1 C172N1 172 North
## C172S1 C172S1 172 South
## C178N3 C178N3 178 North
## C185N2 C185N2 185 North
## C185N3 C185N3 185 North
## C185S3 C185S3 185 South
## C214N2 C214N2 214 North
## C172P2 C172P2 172 Point
## C185S2 C185S2 185 South
## C172P3 C172P3 172 Point
## C185N1 C185N1 185 North
## C172N2 C172N2 172 North
## C178S1 C178S1 178 South
## C185S1 C185S1 185 South
## C172S2 C172S2 172 South
## C178N2 C178N2 178 North
# Normalize by relative abundance.
biom.relabund <- transform_sample_counts(biom, function(x) x / sum(x))
# Ordination plot, k = 3.
ordNMDS.k3 <- ordinate(biom.relabund, method="NMDS", distance="bray", k=3)
## Run 0 stress 0.07869986
## Run 1 stress 0.07869949
## ... New best solution
## ... Procrustes: rmse 0.0002305481 max resid 0.001105219
## ... Similar to previous best
## Run 2 stress 0.07869962
## ... Procrustes: rmse 0.0002017883 max resid 0.001019135
## ... Similar to previous best
## Run 3 stress 0.07869844
## ... New best solution
## ... Procrustes: rmse 0.0004553477 max resid 0.001521332
## ... Similar to previous best
## Run 4 stress 0.08334584
## Run 5 stress 0.07874264
## ... Procrustes: rmse 0.002511844 max resid 0.01380174
## Run 6 stress 0.07869964
## ... Procrustes: rmse 0.0005900723 max resid 0.002571066
## ... Similar to previous best
## Run 7 stress 0.08334478
## Run 8 stress 0.07872072
## ... Procrustes: rmse 0.001339293 max resid 0.005271881
## ... Similar to previous best
## Run 9 stress 0.07870018
## ... Procrustes: rmse 0.0006557135 max resid 0.002997354
## ... Similar to previous best
## Run 10 stress 0.07869808
## ... New best solution
## ... Procrustes: rmse 0.0001766944 max resid 0.0005861921
## ... Similar to previous best
## Run 11 stress 0.07869856
## ... Procrustes: rmse 0.0002775179 max resid 0.001302053
## ... Similar to previous best
## Run 12 stress 0.08337886
## Run 13 stress 0.07869809
## ... Procrustes: rmse 0.0001472413 max resid 0.0003891141
## ... Similar to previous best
## Run 14 stress 0.07869902
## ... Procrustes: rmse 0.0003348988 max resid 0.001535
## ... Similar to previous best
## Run 15 stress 0.07869916
## ... Procrustes: rmse 0.0003981774 max resid 0.001889146
## ... Similar to previous best
## Run 16 stress 0.08334287
## Run 17 stress 0.08804081
## Run 18 stress 0.07869856
## ... Procrustes: rmse 0.0002547032 max resid 0.001142475
## ... Similar to previous best
## Run 19 stress 0.08695913
## Run 20 stress 0.07906707
## ... Procrustes: rmse 0.00900781 max resid 0.03814892
## *** Solution reached
ord.k3 <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site", color = "Date") + geom_point(size=2)
ord.k3 + theme_bw() + scale_colour_hue(h=c(300, 500))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ord.k3 <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site", color = "Date") + geom_point(size=5)
ord.k3 + theme_bw() + scale_colour_hue(h=c(300, 500))+
geom_point(colour="white", size = 3)+
geom_point(colour="black", size = 1)+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ord.k3 <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site", color = "Date") + geom_point(size=5)
ord.k3 + theme_bw() + scale_colour_manual(values=c("grey20", "grey30", "grey40", "grey50", "grey60", "grey70")) +
geom_point(colour="white", size = 3)+
geom_point(colour="black", size = 1)+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# PERMANOVA.
df = as(sample_data(biom), "data.frame")
d = phyloseq::distance(biom, "bray")
clado.adonis = adonis(d ~ Date*Site, df)
clado.adonis
##
## Call:
## adonis(formula = d ~ Date * Site, data = df)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Date 5 3.1945 0.63890 17.4143 0.41338 0.001 ***
## Site 2 1.5561 0.77804 21.2068 0.20136 0.001 ***
## Date:Site 10 1.7298 0.17298 4.7148 0.22384 0.001 ***
## Residuals 34 1.2474 0.03669 0.16142
## Total 51 7.7278 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
biom.rich.est <- estimate_richness(biom, measures = NULL)
biom.rich.est$SampleID.1 <- row.names(biom.rich.est)
biom.rich.est <- merge(biom.rich.est, sam.data, by = "SampleID.1")
biom.rich.est$Date <- as.character(biom.rich.est$Date)
biom.rich.est$Date <- as.numeric(biom.rich.est$Date)
head(biom.rich.est)
## SampleID.1 Observed Chao1 se.chao1 ACE se.ACE Shannon
## 1 C172N1 5139 6770.782 99.67941 7316.921 48.50633 5.444561
## 2 C172N2 5236 7017.650 107.98761 7420.840 47.98329 5.649933
## 3 C172N3 6890 8499.605 90.45120 9053.801 49.75824 5.980043
## 4 C172P1 2936 4855.835 138.81016 5427.422 47.24285 4.851552
## 5 C172P2 4833 7043.680 130.90171 7619.611 51.59008 5.306940
## 6 C172P3 3750 5099.247 96.43281 5368.421 41.19882 4.824680
## Simpson InvSimpson Fisher TreatmentGroup Site Date
## 1 0.9874631 79.76426 896.0810 Early North 172
## 2 0.9892749 93.23962 1001.0057 Early North 172
## 3 0.9899773 99.77399 1361.3171 Early North 172
## 4 0.9767986 43.10086 520.1978 Early Point 172
## 5 0.9803746 50.95444 904.5831 Early Point 172
## 6 0.9400545 16.68183 655.5148 Early Point 172
## Description DateSite
## 1 Sample of day 172 at site North 1 172 North
## 2 Sample of day 172 at site North 2 172 North
## 3 Sample of day 172 at site North 3 172 North
## 4 Sample of day 172 at site Point 1 172 Point
## 5 Sample of day 172 at site Point 2 172 Point
## 6 Sample of day 172 at site Point 3 172 Point
cbPalette <- c("#b5b5b5", "#777777", "#212121")
# Plot observed richness.
biom.rich.est.obs <- summarySE(biom.rich.est, measurevar="Observed", groupvars=c("Date","Site"))
p.obs <- ggplot(biom.rich.est.obs, aes(x=Date, y=Observed, color = Site, shape = Site)) + geom_point(size = 2) + geom_errorbar(aes(ymin=Observed-se, ymax=Observed+se)) + scale_colour_manual(values=cbPalette)
#scale_colour_hue(h=c(400, 120))
p.obs + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Plot Shannon index richness.
biom.rich.est.sha <- summarySE(biom.rich.est, measurevar="Shannon", groupvars=c("Date","Site"))
p.sha <- ggplot(biom.rich.est.sha, aes(x=Date, y=Shannon, color = Site, shape = Site)) +
geom_point(size = 2) + geom_errorbar(aes(ymin=Shannon-se, ymax=Shannon+se)) + scale_colour_hue(h=c(400, 120))
p.sha +
theme_bw() +
theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Plot Shannon index richness.
biom.rich.est.sha <- summarySE(biom.rich.est, measurevar="Shannon", groupvars=c("Date","Site"))
p.sha <- ggplot(biom.rich.est.sha, aes(x=Date, y=Shannon, color = Site, shape = Site)) +
geom_point(size = 2) + geom_errorbar(aes(ymin=Shannon-se, ymax=Shannon+se)) + scale_colour_manual(values=cbPalette)
p.sha +
theme_bw() +
theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Find top 30 genera and subset biom.relabund.
sort.genera <- sort(tapply(taxa_sums(biom.relabund), tax_table(biom.relabund)[, "Genus"], sum), TRUE)
top.genera <- sort.genera[1:30]
top.genera.list <- names(top.genera)
biom.relabund.subset = subset_taxa(biom.relabund, Genus %in% top.genera.list)
biom.relabund.subset.taxa <- subset_taxa(biom.relabund.subset, Genus %in% as.factor(top.genera.list))
biom.relabund.subset.taxa
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 9093 taxa and 52 samples ]
## sample_data() Sample Data: [ 52 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 9093 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 9093 tips and 9089 internal nodes ]
relabund.top.genera <- psmelt(biom.relabund.subset.taxa)
relabund.top.genera.genus <- relabund.top.genera%>%
group_by(Sample, Genus)%>%
mutate(GenusAbundance = sum(Abundance))%>%
distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Phylum, Family, Genus)
head(relabund.top.genera.genus)
## Source: local data frame [6 x 10]
## Groups: Sample, Genus [6]
##
## Sample GenusAbundance TreatmentGroup Site Date Phylum
## <chr> <dbl> <fctr> <fctr> <fctr> <fctr>
## 1 C178S2 0.14728018 Early South 178 Proteobacteria
## 2 C206N2 0.07541493 Late North 206 [Thermi]
## 3 C199S1 0.12408425 Late South 199 Proteobacteria
## 4 C185S1 0.12728636 Early South 185 Proteobacteria
## 5 C199S3 0.10486769 Late South 199 Proteobacteria
## 6 C172S1 0.10894857 Early South 172 Bacteroidetes
## # ... with 4 more variables: Family <fctr>, Genus <fctr>, Sample <chr>,
## # Genus <fctr>
# Summary of genus abundance of top 30 genera.
relabund.top.genera.genus.est <- summarySE(relabund.top.genera.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(relabund.top.genera.genus.est)
## Site Date Genus N GenusAbundance sd se
## 1 North 172 Armatimonas 3 0.0082179013 0.0037601213 0.0021709070
## 2 North 172 Bdellovibrio 3 0.0023540936 0.0001963925 0.0001133873
## 3 North 172 Cellvibrio 3 0.0058399987 0.0066066072 0.0038143264
## 4 North 172 CM44 3 0.0088329518 0.0005298073 0.0003058844
## 5 North 172 Crenothrix 3 0.0008092387 0.0004823472 0.0002784833
## 6 North 172 Dechloromonas 3 0.0019270556 0.0026318679 0.0015195096
## ci
## 1 0.0093406591
## 2 0.0004878661
## 3 0.0164117220
## 4 0.0013161143
## 5 0.0011982168
## 6 0.0065379223
relabund.top.genera.genus.est$Date <- as.character(relabund.top.genera.genus.est$Date)
relabund.top.genera.genus.est$Date <- as.numeric(relabund.top.genera.genus.est$Date)
# Plot summary of genus abundance of top 30 genera.
p <- ggplot(relabund.top.genera.genus.est, aes(x=Date, y=GenusAbundance, color = Site, shape = Site)) + geom_point(size = 2) + geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Find methanotrophic bacteria by genus.
methanolist <- read.table(file = "taxa-of-interest/methanos.txt")
methanolist <- as.vector(methanolist$V1)
biom.relabund.methanos <- subset_taxa(biom.relabund, Genus %in% as.factor(methanolist))
biom.relabund.methanos
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 567 taxa and 52 samples ]
## sample_data() Sample Data: [ 52 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 567 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 567 tips and 566 internal nodes ]
relabund.methanos <- psmelt(biom.relabund.methanos)
relabund.methanos.genus <- relabund.methanos%>%
group_by(Sample, Genus)%>%
mutate(GenusAbundance = sum(Abundance))%>%
distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Phylum, Family, Genus)
relabund.methanos.genus.est <- summarySE(relabund.methanos.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(relabund.methanos.genus.est)
## Site Date Genus N GenusAbundance sd se
## 1 North 172 Crenothrix 3 8.092387e-04 4.823472e-04 2.784833e-04
## 2 North 172 Methylibium 3 6.600147e-04 3.800437e-04 2.194183e-04
## 3 North 172 Methylocaldum 3 7.717447e-05 5.988775e-05 3.457621e-05
## 4 North 172 Methylomicrobium 3 0.000000e+00 0.000000e+00 0.000000e+00
## 5 North 172 Methylomonas 3 1.097031e-04 2.736128e-06 1.579704e-06
## 6 North 172 Methylosinus 3 4.677898e-05 2.432451e-05 1.404376e-05
## ci
## 1 1.198217e-03
## 2 9.440809e-04
## 3 1.487694e-04
## 4 0.000000e+00
## 5 6.796919e-06
## 6 6.042543e-05
relabund.methanos.genus.est$Date <- as.character(relabund.methanos.genus.est$Date)
relabund.methanos.genus.est$Date <- as.numeric(relabund.methanos.genus.est$Date)
# Plot summary of genus abundance of methanotrophic genera.
p <- ggplot(relabund.methanos.genus.est, aes(x=Date, y=GenusAbundance, color = Site, shape = Site)) + geom_point(size = 2) + geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(strip.text = element_text(face = "italic"))

# Plot summary of genus abundance of methanotrophic genera.
p <- ggplot(relabund.methanos.genus.est, aes(x=Date, y=GenusAbundance, color = Site, shape = Site)) + geom_point(size = 2) + geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_manual(values=cbPalette)
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(strip.text = element_text(face = "italic"))

# Find all genera
all.genera <- sort(get_taxa_unique(biom.relabund, "Genus"), decreasing=FALSE)
biom.relabund.all.genera <- subset_taxa(biom.relabund, Genus %in% as.factor(all.genera))
biom.relabund.all.genera <- psmelt(biom.relabund.all.genera)
biom.relabund.all.genera.genus <- biom.relabund.all.genera%>%
group_by(Sample, Genus)%>%
mutate(GenusAbundance = sum(Abundance))%>%
distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
biom.relabund.all.genera.genus.est <- summarySE(biom.relabund.all.genera.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(biom.relabund.all.genera.genus.est)
## Site Date Genus N GenusAbundance sd se
## 1 North 172 A17 3 1.044934e-05 7.240333e-06 4.180208e-06
## 2 North 172 Achromobacter 3 1.205785e-06 2.088482e-06 1.205785e-06
## 3 North 172 Acidaminobacter 3 2.030206e-05 3.516421e-05 2.030206e-05
## 4 North 172 Acidocella 3 0.000000e+00 0.000000e+00 0.000000e+00
## 5 North 172 Acidovorax 3 4.023211e-05 3.508560e-05 2.025668e-05
## 6 North 172 Acinetobacter 3 1.395043e-04 1.803168e-04 1.041059e-04
## ci
## 1 1.798598e-05
## 2 5.188076e-06
## 3 8.735273e-05
## 4 0.000000e+00
## 5 8.715747e-05
## 6 4.479317e-04
biom.relabund.all.genera.genus.est$Date <- as.character(biom.relabund.all.genera.genus.est$Date)
biom.relabund.all.genera.genus.est$Date <- as.numeric(biom.relabund.all.genera.genus.est$Date)
p <- ggplot(biom.relabund.all.genera.genus.est, aes(x=Date, y=GenusAbundance, color = Site, shape = Site)) + geom_point(size = 2) + geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# We want to calculate the total relative abundance of each phylum.
# "Melt" the phyloseq data into a dataframe and then take the top X most abundant phyla.
biom.melt <- psmelt(biom.relabund)
biom.melt.sorted <- biom.melt %>%
group_by(Sample,Phylum) %>%
summarize(PhyAbund = sum(Abundance))%>%
group_by(Phylum)%>%
summarize(MeanPhyAbund = mean(PhyAbund))%>%
arrange(-MeanPhyAbund)
# List of nPhyla top most abundant phyla.
nPhyla =16
PhylumList <- biom.melt.sorted[1:nPhyla,1]
PhylumList <- PhylumList[is.na(PhylumList)==FALSE,]
PhylumList <- levels(droplevels(as.factor(PhylumList$Phylum)))
PhylumList
## [1] "[Thermi]" "Acidobacteria" "Actinobacteria"
## [4] "Armatimonadetes" "Bacteroidetes" "Chlorobi"
## [7] "Chloroflexi" "Cyanobacteria" "Gemmatimonadetes"
## [10] "GN02" "Planctomycetes" "Proteobacteria"
## [13] "SR1" "TM7" "Verrucomicrobia"
# Subset biom.melt for phyla.
biom.subset <- subset_taxa(biom.relabund, Phylum %in% PhylumList)
biom.subset.melt <- psmelt(biom.subset)
biom.subset.melt.sorted = biom.subset.melt %>%
group_by(Sample,Site,Date,Phylum) %>%
summarize(PhyAbund = sum(Abundance))
# Summarize phylum abundances.
biom.subset.melt.sorted.est <- summarySE(biom.subset.melt.sorted, measurevar="PhyAbund", groupvars=c("Site","Date", "Phylum"))
head(biom.subset.melt.sorted.est)
## Site Date Phylum N PhyAbund sd se
## 1 North 172 [Thermi] 3 0.035290992 0.0143074443 0.0082604068
## 2 North 172 Acidobacteria 3 0.053632305 0.0245078281 0.0141496011
## 3 North 172 Actinobacteria 3 0.042155944 0.0102741604 0.0059317892
## 4 North 172 Armatimonadetes 3 0.008399230 0.0037857001 0.0021856750
## 5 North 172 Bacteroidetes 3 0.396831763 0.0276493911 0.0159633834
## 6 North 172 Chlorobi 3 0.000709788 0.0005910336 0.0003412334
## ci
## 1 0.035541662
## 2 0.060880820
## 3 0.025522429
## 4 0.009404200
## 5 0.068684895
## 6 0.001468209
biom.subset.melt.sorted.est$Date <- as.character(biom.subset.melt.sorted.est$Date)
biom.subset.melt.sorted.est$Date <- as.numeric(biom.subset.melt.sorted.est$Date)
# Plot top phyla abundances.
p <- ggplot(biom.subset.melt.sorted.est, aes(x=Date, y=PhyAbund, color = Site, shape = Site)) + geom_point(size = 2) + geom_errorbar(aes(ymin=PhyAbund-se, ymax=PhyAbund+se)) + facet_wrap(~Phylum, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400,120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10)) + labs(x="Date",y="Relative Abundance") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(strip.text = element_text(face = "italic"))

# Plot top phyla abundances.
p <- ggplot(biom.subset.melt.sorted.est, aes(x=Date, y=PhyAbund, color = Site, shape = Site)) + geom_point(size = 2) + geom_errorbar(aes(ymin=PhyAbund-se, ymax=PhyAbund+se)) + facet_wrap(~Phylum, ncol = 3, scales="free_y") + scale_colour_manual(values=cbPalette)
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10)) + labs(x="Date",y="Relative Abundance") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(strip.text = element_text(face = "italic"))

# Find top 30 classes and subset biom.relabund.
sort.classes <- sort(tapply(taxa_sums(biom.relabund), tax_table(biom.relabund)[, "Class"], sum), TRUE)
top.classes <- sort.classes[1:30]
top.classes.list <- names(top.classes)
biom.relabund.subset = subset_taxa(biom.relabund, Class %in% top.classes.list)
biom.relabund.subset.taxa <- subset_taxa(biom.relabund.subset, Class %in% as.factor(top.classes.list))
biom.relabund.subset.taxa
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 38156 taxa and 52 samples ]
## sample_data() Sample Data: [ 52 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 38156 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 38156 tips and 38142 internal nodes ]
relabund.top.classes <- psmelt(biom.relabund.subset.taxa)
relabund.top.classes.class <- relabund.top.classes%>%
group_by(Sample, Class)%>%
mutate(ClassAbundance = sum(Abundance))%>%
distinct(Sample, ClassAbundance, TreatmentGroup, Site, Date, Phylum, Family, Class)
relabund.top.classes.class.print <- summarySE(relabund.top.classes.class, measurevar="ClassAbundance", groupvars=c("Class"))
relabund.top.classes.class.print[,c("Class","N","ClassAbundance")]
## Class N ClassAbundance
## 1 [Chloracidobacteria] 156 0.0242780076
## 2 [Pedosphaerae] 312 0.0008576936
## 3 [Saprospirae] 156 0.2600740284
## 4 Acidimicrobiia 572 0.0142314956
## 5 Acidobacteria-6 156 0.0087173757
## 6 Actinobacteria 1092 0.0008919558
## 7 Alphaproteobacteria 1196 0.1189849423
## 8 Armatimonadia 52 0.0043355902
## 9 Bacteroidia 468 0.0016047794
## 10 BD1-5 52 0.0036947445
## 11 Betaproteobacteria 728 0.1394166181
## 12 Chloroplast 156 0.0387461867
## 13 Clostridia 624 0.0008200079
## 14 Cytophagia 260 0.0479689304
## 15 Deinococci 156 0.0590344529
## 16 Deltaproteobacteria 1352 0.0199104462
## 17 Flavobacteriia 208 0.0494758417
## 18 Gammaproteobacteria 1664 0.0668946728
## 19 Gemmatimonadetes 156 0.0019508196
## 20 OM190 52 0.0040714493
## 21 OPB56 52 0.0007609416
## 22 Phycisphaerae 104 0.0016755586
## 23 Planctomycetia 260 0.0069005428
## 24 Solibacteres 208 0.0118832190
## 25 Sphingobacteriia 156 0.0073552263
## 26 Synechococcophycideae 260 0.0354155284
## 27 TA18 52 0.0015786666
## 28 TM7-1 52 0.0018488588
## 29 Verrucomicrobiae 52 0.0026770285
## 30 ZB2 52 0.0007206090
# Summary of class abundance of top 30 classes.
relabund.top.classes.class.est <- summarySE(relabund.top.classes.class, measurevar="ClassAbundance", groupvars=c("Site","Date", "Class"))
head(relabund.top.classes.class.est)
## Site Date Class N ClassAbundance sd
## 1 North 172 [Chloracidobacteria] 9 0.039306658 0.0176302507
## 2 North 172 [Pedosphaerae] 18 0.001325860 0.0001686127
## 3 North 172 [Saprospirae] 9 0.191751831 0.0190289005
## 4 North 172 Acidimicrobiia 33 0.038384578 0.0085857752
## 5 North 172 Acidobacteria-6 9 0.003818652 0.0006679931
## 6 North 172 Actinobacteria 63 0.003232369 0.0001236175
## se ci
## 1 5.876750e-03 1.355181e-02
## 2 3.974240e-05 8.384913e-05
## 3 6.342967e-03 1.462691e-02
## 4 1.494592e-03 3.044383e-03
## 5 2.226644e-04 5.134650e-04
## 6 1.557434e-05 3.113266e-05
relabund.top.classes.class.est$Date <- as.character(relabund.top.classes.class.est$Date)
relabund.top.classes.class.est$Date <- as.numeric(relabund.top.classes.class.est$Date)
# Plot summary of class abundance of top 30 classes.
p <- ggplot(relabund.top.classes.class.est, aes(x=Date, y=ClassAbundance, color = Site, shape = Site)) + geom_point(size = 2) + geom_errorbar(aes(ymin=ClassAbundance-se, ymax=ClassAbundance+se)) + facet_wrap(~Class, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Find all classes
all.classes <- sort(get_taxa_unique(biom.relabund, "Class"), decreasing=FALSE)
biom.relabund.all.classes <- subset_taxa(biom.relabund, Class %in% as.factor(all.classes))
biom.relabund.all.classes <- psmelt(biom.relabund.all.classes)
biom.relabund.all.classes.class <- biom.relabund.all.classes%>%
group_by(Sample, Class)%>%
mutate(ClassAbundance = sum(Abundance))%>%
distinct(Sample, ClassAbundance, TreatmentGroup, Site, Date, Family, Class)
biom.relabund.all.classes.class.est <- summarySE(biom.relabund.all.classes.class, measurevar="ClassAbundance", groupvars=c("Site","Date", "Class"))
head(biom.relabund.all.classes.class.est)
## Site Date Class N ClassAbundance sd
## 1 North 172 [Brachyspirae] 6 0.000000e+00 0.000000e+00
## 2 North 172 [Brevinematae] 3 0.000000e+00 0.000000e+00
## 3 North 172 [Chloracidobacteria] 9 3.930666e-02 1.763025e-02
## 4 North 172 [Cloacamonae] 6 0.000000e+00 0.000000e+00
## 5 North 172 [Fimbriimonadia] 3 2.686822e-05 1.191337e-05
## 6 North 172 [Lentisphaeria] 6 1.561697e-06 2.419371e-06
## se ci
## 1 0.000000e+00 0.000000e+00
## 2 0.000000e+00 0.000000e+00
## 3 5.876750e-03 1.355181e-02
## 4 0.000000e+00 0.000000e+00
## 5 6.878186e-06 2.959445e-05
## 6 9.877040e-07 2.538974e-06
biom.relabund.all.classes.class.est$Date <- as.character(biom.relabund.all.classes.class.est$Date)
biom.relabund.all.classes.class.est$Date <- as.numeric(biom.relabund.all.classes.class.est$Date)
p <- ggplot(biom.relabund.all.classes.class.est, aes(x=Date, y=ClassAbundance, color = Site, shape = Site)) + geom_point(size = 2) + geom_errorbar(aes(ymin=ClassAbundance-se, ymax=ClassAbundance+se)) + facet_wrap(~Class, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Data downloaded from
data <- read.csv("north_temperate_lakes_lter__daily_water_temperature_-_lake_mendota.csv", header=T)
head(data)
## sampledate year4 month daynum depth wtemp flag_wtemp
## 1 2006-06-27 2006 6 178 0.0 22.205 N
## 2 2006-06-27 2006 6 178 0.5 22.202 N
## 3 2006-06-27 2006 6 178 1.0 22.244 N
## 4 2006-06-27 2006 6 178 1.5 22.219 N
## 5 2006-06-27 2006 6 178 2.0 22.259 N
## 6 2006-06-27 2006 6 178 2.5 22.214 N
nolegend <- theme(legend.position="none")
Jdays <- c(172,178,185,199,206,214)
# Subset all wtemp at or below 1.5 meters (sample depth up to shore).
clado.depth <- subset(data, depth<=1.5, select = year4:wtemp)
# All years (2006-2016), full year.
p <- qplot(clado.depth$daynum, y = clado.depth$wtemp)
p <- p + geom_point(aes(colour = clado.depth$year4), size=1, alpha = 0.8) +
scale_y_continuous(limits = c(4,30))+
xlab("Date") + ylab("Temperature (°C)")+
geom_vline(xintercept=Jdays, colour="darkgrey", linetype = "longdash") +
guides(color=guide_legend(title="Year")) +
scale_colour_gradient(low = "darkred")
p+ theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# All years (2006-2016), sample dates ± 25 days
clado.depth.zoom <- subset(clado.depth, daynum>=150 & daynum<=250, select = year4:wtemp)
p <- qplot(clado.depth.zoom$daynum, y = clado.depth.zoom$wtemp) +
scale_y_continuous(limits = c(10,30))+
geom_point(aes(colour = clado.depth.zoom$year4), size=1, alpha = 0.8) +
labs(title = "Water Temperature (°C) <1.5m Depth, Lake Mendota", x="Date", y="Temperature °C") +
geom_vline(xintercept=Jdays, colour="darkgreen", linetype = "longdash") +
guides(color=guide_legend(title="Year")) + scale_colour_gradient(low = "darkred")
p+ theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Smarter way to make grid of plots.
p <- ggplot(clado.depth.zoom, aes(daynum, wtemp))
p <- p + geom_point(size = 0.5) +
ylim(15,30) +
facet_wrap(~year4, ncol = 3) +
geom_vline(xintercept=Jdays, colour="grey30", linetype = "longdash") +
guides(color=guide_legend(title="Year")) +
labs(title = "Water Temperature (°C) <1.5m Depth, Lake Mendota", x="Date", y="Temperature °C")
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

clado.depth.not14 <- subset(clado.depth, year4!=2014)
clado.depth.est.not14 <- summarySE(clado.depth.not14, measurevar= "wtemp", groupvars=c("daynum"))
head(clado.depth.est.not14)
## daynum N wtemp sd se ci
## 1 89 4 5.18050 0.039076847 0.019538424 0.062179984
## 2 90 4 4.85075 0.015283433 0.007641717 0.024319353
## 3 91 4 4.86175 0.012711543 0.006355772 0.020226902
## 4 92 4 5.02750 0.007852813 0.003926406 0.012495577
## 5 93 4 5.11150 0.024365276 0.012182638 0.038770591
## 6 94 4 5.12750 0.005066228 0.002533114 0.008061499
p <- ggplot(clado.depth.est.not14, aes(x=daynum, y=wtemp)) +
geom_point(size = 0.5) +
geom_errorbar(aes(ymin=wtemp-se, ymax=wtemp+se))+
geom_vline(xintercept=Jdays, colour="darkgrey", linetype = "longdash") +
labs(x="Date", y="Temperature (°C) depths ≤1.5m")
p + theme_bw() +
scale_y_continuous(limits = c(10,26))+
scale_x_continuous(limits = c(120,320))+
theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

clado.depth.deep <- subset(clado.depth, depth>0)
clado.depth.not14 <- subset(clado.depth.deep, year4!=2014)
clado.depth.est.not14 <- summarySE(clado.depth.not14, measurevar= "wtemp", groupvars=c("daynum", "year4"))
clado.depth.est.not14.est <- summarySE(clado.depth.est.not14, measurevar= "wtemp", groupvars=c("daynum"))
head(clado.depth.est.not14.est)
## daynum N wtemp sd se ci
## 1 89 1 5.166000 NA NA NA
## 2 90 1 4.843333 NA NA NA
## 3 91 1 4.855667 NA NA NA
## 4 92 1 5.028000 NA NA NA
## 5 93 1 5.123667 NA NA NA
## 6 94 1 5.130000 NA NA NA
p <- ggplot(clado.depth.est.not14.est, aes(x=daynum, y=wtemp)) +
geom_point(size = 0.5) +
geom_errorbar(aes(ymin=wtemp-se, ymax=wtemp+se))+
geom_vline(xintercept=Jdays, colour="darkgreen", linetype = "longdash") +
labs(x="Date", y="Temperature (°C), 0.5-1.5m")
p + theme_bw() +
scale_y_continuous(limits = c(10,26))+
scale_x_continuous(limits = c(120,320))+
theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

clado.depth.deep <- subset(clado.depth, depth>0)
clado.depth.not14 <- subset(clado.depth.deep, year4!=2014)
clado.depth.est.not14 <- summarySE(clado.depth.not14, measurevar= "wtemp", groupvars=c("daynum", "year4"))
clado.depth.est.not14.est <- summarySE(clado.depth.est.not14, measurevar= "wtemp", groupvars=c("daynum"))
head(clado.depth.est.not14.est)
## daynum N wtemp sd se ci
## 1 89 1 5.166000 NA NA NA
## 2 90 1 4.843333 NA NA NA
## 3 91 1 4.855667 NA NA NA
## 4 92 1 5.028000 NA NA NA
## 5 93 1 5.123667 NA NA NA
## 6 94 1 5.130000 NA NA NA
p <- ggplot(clado.depth.est.not14.est, aes(x=daynum, y=wtemp)) +
geom_point(size = 0.5) +
geom_errorbar(aes(ymin=wtemp-se, ymax=wtemp+se))+
geom_vline(xintercept=Jdays, colour="grey10", linetype = "longdash") +
labs(x="Date", y="Temperature (°C), 0.5-1.5m")
p + theme_bw() +
scale_y_continuous(limits = c(10,26))+
scale_x_continuous(limits = c(120,320))+
theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Find genera of interest and subset biom.relabund.
genofint <- read.table(file = "taxa-of-interest/genera-of-interest.txt")
genofint.list <- as.vector(genofint$V1)
biom.relabund.subset.genofint = subset_taxa(biom.relabund, Genus %in% genofint.list)
biom.relabund.subset.genofint.taxa <- subset_taxa(biom.relabund.subset.genofint, Genus %in% as.factor(genofint.list))
biom.relabund.subset.genofint.taxa
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6763 taxa and 52 samples ]
## sample_data() Sample Data: [ 52 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 6763 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6763 tips and 6759 internal nodes ]
relabund.genofint <- psmelt(biom.relabund.subset.genofint.taxa)
relabund.genofint.genus <- relabund.genofint%>%
group_by(Sample, Genus)%>%
mutate(GenusAbundance = sum(Abundance))%>%
distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
head(relabund.genofint.genus)
## Source: local data frame [6 x 9]
## Groups: Sample, Genus [6]
##
## Sample GenusAbundance TreatmentGroup Site Date Family
## <chr> <dbl> <fctr> <fctr> <fctr> <fctr>
## 1 C206N2 0.07541493 Late North 206 Thermaceae
## 2 C172S1 0.10894857 Early South 172 Cytophagaceae
## 3 C172N3 0.12464218 Early North 172 Flavobacteriaceae
## 4 C172N1 0.08012805 Early North 172 Flavobacteriaceae
## 5 C214N1 0.05272836 Late North 214 Thermaceae
## 6 C206N3 0.05263943 Late North 206 Thermaceae
## # ... with 3 more variables: Genus <fctr>, Sample <chr>, Genus <fctr>
# Summary of genus abundance of genera of interest.
relabund.genofint.genus.est <- summarySE(relabund.genofint.genus, measurevar="GenusAbundance", groupvars=c("Site","Date","Genus"))
head(relabund.genofint.genus.est)
## Site Date Genus N GenusAbundance sd se
## 1 North 172 Armatimonas 3 0.008217901 0.0037601213 0.0021709070
## 2 North 172 Cellvibrio 3 0.005839999 0.0066066072 0.0038143264
## 3 North 172 CM44 3 0.008832952 0.0005298073 0.0003058844
## 4 North 172 Dechloromonas 3 0.001927056 0.0026318679 0.0015195096
## 5 North 172 Flavobacterium 3 0.085333723 0.0369814367 0.0213512424
## 6 North 172 Flectobacillus 3 0.010964250 0.0028549117 0.0016482840
## ci
## 1 0.009340659
## 2 0.016411722
## 3 0.001316114
## 4 0.006537922
## 5 0.091866981
## 6 0.007091994
relabund.genofint.genus.est$Date <- as.character(relabund.genofint.genus.est$Date)
relabund.genofint.genus.est$Date <- as.numeric(relabund.genofint.genus.est$Date)
# Plot summary of genus abundance of genera of interest.
p <- ggplot(relabund.genofint.genus.est, aes(x=Date, y=GenusAbundance, color = Site, shape = Site)) + geom_point(size = 2) + geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10)) + ylab("Relative Abundance")+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(strip.text = element_text(face = "italic"))

# Plot summary of genus abundance of genera of interest.
p <- ggplot(relabund.genofint.genus.est, aes(x=Date, y=GenusAbundance, color = Site, shape = Site)) + geom_point(size = 2) + geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_manual(values=cbPalette)
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10)) + ylab("Relative Abundance")+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(strip.text = element_text(face = "italic"))

# Find classes_zulk of interest and subset biom.relabund.
classzulk <- read.csv(file = "taxa-of-interest/classes-zulkifly.csv", header = T)
classzulk.list <- as.vector(classzulk$Class)
biom.relabund.subset.classzulk = subset_taxa(biom.relabund, Class %in% classzulk.list)
biom.relabund.subset.classzulk.taxa <- subset_taxa(biom.relabund.subset.classzulk, Class %in% as.factor(classzulk.list))
biom.relabund.subset.classzulk.taxa
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 37840 taxa and 52 samples ]
## sample_data() Sample Data: [ 52 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 37840 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 37840 tips and 37826 internal nodes ]
relabund.classzulk <- psmelt(biom.relabund.subset.classzulk.taxa)
relabund.classzulk.class <- relabund.classzulk%>%
group_by(Sample, Class)%>%
mutate(ClassAbundance = sum(Abundance))%>%
distinct(Sample, ClassAbundance, TreatmentGroup, Site, Date, Phylum, Family, Class)
length(relabund.classzulk.class$Class); head(relabund.classzulk.class)
## [1] 10816
## Source: local data frame [6 x 10]
## Groups: Sample, Class [6]
##
## Sample ClassAbundance TreatmentGroup Site Date Phylum
## <chr> <dbl> <fctr> <fctr> <fctr> <fctr>
## 1 C206N3 0.2638815 Late North 206 Proteobacteria
## 2 C178S2 0.1751127 Early South 178 Proteobacteria
## 3 C206N2 0.1210454 Late North 206 [Thermi]
## 4 C199S1 0.1578279 Late South 199 Proteobacteria
## 5 C185S1 0.1641425 Early South 185 Proteobacteria
## 6 C178N2 0.2485806 Early North 178 Bacteroidetes
## # ... with 4 more variables: Family <fctr>, Class <fctr>, Sample <chr>,
## # Class <fctr>
relabund.classzulk.class <- subset(relabund.classzulk.class, Class!="Chloroplast")
length(relabund.classzulk.class$Class); head(relabund.classzulk.class)
## [1] 10816
## Source: local data frame [6 x 10]
## Groups: Sample, Class [6]
##
## Sample ClassAbundance TreatmentGroup Site Date Phylum
## <chr> <dbl> <fctr> <fctr> <fctr> <fctr>
## 1 C206N3 0.2638815 Late North 206 Proteobacteria
## 2 C178S2 0.1751127 Early South 178 Proteobacteria
## 3 C206N2 0.1210454 Late North 206 [Thermi]
## 4 C199S1 0.1578279 Late South 199 Proteobacteria
## 5 C185S1 0.1641425 Early South 185 Proteobacteria
## 6 C178N2 0.2485806 Early North 178 Bacteroidetes
## # ... with 4 more variables: Family <fctr>, Class <fctr>, Sample <chr>,
## # Class <fctr>
# Summary of class abundance of classes_zulk of interest.
relabund.classzulk.class.est <- summarySE(relabund.classzulk.class, measurevar ="ClassAbundance", groupvars=c("Site","Date","Phylum","Class"))
length(relabund.classzulk.class.est$Class)
## [1] 540
relabund.classzulk.class.est$Date <- as.character(relabund.classzulk.class.est$Date)
relabund.classzulk.class.est$Date <- as.numeric(relabund.classzulk.class.est$Date)
# Plot MERGED summary of class abundance of classes_zulk of interest and Zulkifly et al. class abundances.
relabund.classzulk.class.est.merge <- merge(relabund.classzulk.class.est, classzulk, by = "Class")
head(relabund.classzulk.class.est.merge)
## Class Site Date Phylum N ClassAbundance
## 1 [Chloracidobacteria] North 206 Acidobacteria 9 0.02491042
## 2 [Chloracidobacteria] North 214 Acidobacteria 9 0.02066233
## 3 [Chloracidobacteria] South 214 Acidobacteria 9 0.01934126
## 4 [Chloracidobacteria] North 185 Acidobacteria 9 0.03053088
## 5 [Chloracidobacteria] North 178 Acidobacteria 9 0.03048632
## 6 [Chloracidobacteria] South 199 Acidobacteria 9 0.01520458
## sd se ci ClassAbundance_zulkifly
## 1 0.0030860368 0.0010286789 0.0023721379 0
## 2 0.0017004535 0.0005668178 0.0013070843 0
## 3 0.0007875231 0.0002625077 0.0006053439 0
## 4 0.0018881869 0.0006293956 0.0014513889 0
## 5 0.0061337576 0.0020445859 0.0047148235 0
## 6 0.0015427929 0.0005142643 0.0011858956 0
p <- ggplot(relabund.classzulk.class.est.merge, aes(x=Date, y=ClassAbundance, color = Site, shape = Site)) + geom_point(size = 2) + geom_errorbar(aes(ymin=ClassAbundance-se, ymax=ClassAbundance+se)) + facet_wrap(~Class, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120)) + geom_hline(aes(yintercept = ClassAbundance_zulkifly), linetype="dashed")+ scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(strip.text = element_text(face = "italic"))

# Summary of class abundance of classes_zulk of interest.
relabund.classzulk.class.est.nosplit <- summarySE(relabund.classzulk.class.est, measurevar ="ClassAbundance", groupvars=c("Phylum","Class"))
head(relabund.classzulk.class.est.nosplit)
## Phylum Class N ClassAbundance sd
## 1 [Thermi] Deinococci 18 0.058402291 0.022587941
## 2 Acidobacteria [Chloracidobacteria] 18 0.023908228 0.008663260
## 3 Acidobacteria Acidobacteria-6 18 0.008565357 0.006008876
## 4 Acidobacteria Solibacteres 18 0.011663895 0.008689581
## 5 Actinobacteria Acidimicrobiia 18 0.014017705 0.010998519
## 6 Actinobacteria Actinobacteria 18 0.000871626 0.001038802
## se ci
## 1 0.0053240287 0.011232719
## 2 0.0020419499 0.004308138
## 3 0.0014163056 0.002988144
## 4 0.0020481538 0.004321227
## 5 0.0025923759 0.005469435
## 6 0.0002448479 0.000516584
relabund.classzulk.class.est.merge.nosplit <- merge(relabund.classzulk.class.est.nosplit, classzulk, by ="Class")
relabund.classzulk.class.est.merge.nosplit
## Class Phylum N ClassAbundance sd
## 1 [Chloracidobacteria] Acidobacteria 18 0.0239082284 0.0086632598
## 2 [Pedosphaerae] Verrucomicrobia 18 0.0008585850 0.0006050463
## 3 [Saprospirae] Bacteroidetes 18 0.2606628866 0.0436653804
## 4 Acidimicrobiia Actinobacteria 18 0.0140177053 0.0109985194
## 5 Acidobacteria-6 Acidobacteria 18 0.0085653572 0.0060088756
## 6 Actinobacteria Actinobacteria 18 0.0008716260 0.0010388017
## 7 Alphaproteobacteria Proteobacteria 18 0.1193086584 0.0258636402
## 8 Armatimonadia Armatimonadetes 18 0.0043316962 0.0031456709
## 9 Bacteroidia Bacteroidetes 18 0.0015860106 0.0013530463
## 10 BD1-5 GN02 18 0.0036777802 0.0020102991
## 11 Betaproteobacteria Proteobacteria 18 0.1382959544 0.0300836285
## 12 Clostridia Firmicutes 18 0.0007966420 0.0012419290
## 13 Cytophagia Bacteroidetes 18 0.0471742356 0.0352465522
## 14 Deinococci [Thermi] 18 0.0584022914 0.0225879408
## 15 Deltaproteobacteria Proteobacteria 18 0.0202432199 0.0092228614
## 16 Flavobacteriia Bacteroidetes 18 0.0496626064 0.0197355920
## 17 Gammaproteobacteria Proteobacteria 18 0.0667539305 0.0486753799
## 18 Gemmatimonadetes Gemmatimonadetes 18 0.0019379966 0.0010921809
## 19 Nitrospira Nitrospirae 18 0.0003613375 0.0003812892
## 20 OM190 Planctomycetes 18 0.0040188935 0.0023178795
## 21 OPB56 Chlorobi 18 0.0008028522 0.0009068642
## 22 Phycisphaerae Planctomycetes 18 0.0017869873 0.0018118456
## 23 Planctomycetia Planctomycetes 18 0.0068856180 0.0034767750
## 24 Solibacteres Acidobacteria 18 0.0116638949 0.0086895806
## 25 Sphingobacteriia Bacteroidetes 18 0.0073246136 0.0036968485
## 26 Synechococcophycideae Cyanobacteria 18 0.0356335002 0.0400623018
## 27 TA18 Proteobacteria 18 0.0015533374 0.0010169741
## 28 TM7-1 TM7 18 0.0018127999 0.0012590898
## 29 Verrucomicrobiae Verrucomicrobia 18 0.0027409754 0.0017111353
## 30 ZB2 OD1 18 0.0007276926 0.0005640772
## se ci ClassAbundance_zulkifly
## 1 2.041950e-03 0.0043081377 0.00
## 2 1.426108e-04 0.0003008824 0.00
## 3 1.029203e-02 0.0217142828 0.00
## 4 2.592376e-03 0.0054694350 0.00
## 5 1.416306e-03 0.0029881435 0.00
## 6 2.448479e-04 0.0005165840 0.00
## 7 6.096118e-03 0.0128616857 0.17
## 8 7.414417e-04 0.0015643053 0.00
## 9 3.189161e-04 0.0006728541 0.01
## 10 4.738320e-04 0.0009996982 0.00
## 11 7.090779e-03 0.0149602365 0.21
## 12 2.927255e-04 0.0006175968 0.00
## 13 8.307692e-03 0.0175276980 0.00
## 14 5.324029e-03 0.0112327187 0.03
## 15 2.173849e-03 0.0045864211 0.07
## 16 4.651724e-03 0.0098142790 0.07
## 17 1.147290e-02 0.0242056969 0.12
## 18 2.574295e-04 0.0005431288 0.01
## 19 8.987072e-05 0.0001896106 0.01
## 20 5.463294e-04 0.0011526543 0.00
## 21 2.137499e-04 0.0004509730 0.00
## 22 4.270561e-04 0.0009010096 0.00
## 23 8.194837e-04 0.0017289595 0.05
## 24 2.048154e-03 0.0043212268 0.00
## 25 8.713555e-04 0.0018383995 0.16
## 26 9.442775e-03 0.0199225140 0.00
## 27 2.397031e-04 0.0005057293 0.00
## 28 2.967703e-04 0.0006261306 0.00
## 29 4.033185e-04 0.0008509276 0.04
## 30 1.329543e-04 0.0002805090 0.00
df <- relabund.classzulk.class.est.merge.nosplit
limits <- aes(ymin = ClassAbundance-se, ymax= ClassAbundance+se)
p <- ggplot(df, aes(Class))
p <- p +
ylab("Relative Abundance")+
facet_grid(~Phylum, scales = "free", space="free") +
guides(color=guide_legend(title="Growth Season")) +
#theme_bw() +
theme(legend.position="top")+
theme(strip.text.x = element_text(size = 8, angle = 90, face="italic")) +
theme(axis.text.x = element_text(size = 10, angle = 55, hjust=1),axis.text.y = element_text(size = 10))+
geom_point(aes(y = ClassAbundance, color = "2014 (this study) "), size = 3, alpha = 0.5) +
geom_errorbar(limits, color = "black", width=0.5)+
geom_point(aes(y = ClassAbundance_zulkifly, color = "2011 (Zulkifly et al., 2012)"), size = 3, alpha = 0.5)+
scale_colour_manual(values=cbPalette)
p

#write.csv(relabund.classzulk.class.est.merge.nosplit, file = "~/Dropbox/clado-manuscript/Mikes_MS_Data/braus-zulkifly.csv")
df <- read.csv(file = "~/Dropbox/clado-manuscript/Mikes_MS_Data/braus-zulkifly.csv")
limits <- aes(ymin = ClassAbundance-se, ymax= ClassAbundance+se)
p <- ggplot(df, aes(Class, color = Year))
p <- p +
geom_point(aes(y = ClassAbundance), size = 2) +
geom_errorbar(limits, color = "black", width=0.5) +
ylab("Relative Abundance") +
facet_grid(~Phylum, scales = "free", space="free") +
guides(color=guide_legend(title="Growth Season")) +
theme_bw() +
theme(legend.position="top") +
theme(strip.text.x = element_text(size = 8, angle = 90, face="italic")) +
theme(axis.text.x = element_text(size = 10, angle = 55, hjust=1),axis.text.y = element_text(size = 10)) +
scale_colour_manual(values=c("#b5b5b5", "#212121"))
p+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
